package org.luosl.webmagicx.conf

import java.io.File

import org.luosl.webmagicx.utils.Logging
import org.luosl.webmagicx.conf.PropType._
import org.luosl.webmagicx.conf.MatcherConverters._
import play.api.libs.json.{JsObject, JsValue, Json, StaticBinding}

import scala.collection.immutable.Seq
import scala.io.Codec
import scala.xml._

/**
  * Created by luosl on 2017/11/7.
  */
object ConfLoader extends Logging{

  /** 默认的4种组件 */
  val defaultComponentTags:List[String] = List("pipeline", "scheduler", "handler")

  /** 默认已实现组件 **/
  val systemCompnentTags:Map[String, String] = Map(
    "csvPipeline" -> "org.luosl.webmagicx.pipeline.CSVPipeline",
    "jdbcPipeline" -> "org.luosl.webmagicx.pipeline.SimpleJdbcPipeline",
    "textHandler" -> "org.luosl.webmagicx.handler.TextHandler",
    "downloadHandler" -> "org.luosl.webmagicx.handler.DownloadHandler",
    "redisScheduler" -> "org.luosl.webmagicx.scheduler.PriorityRedisScheduler",
    "fileScheduler" -> "org.luosl.webmagicx.scheduler.PriorityFileScheduler"
  )

  /**
    * 根据路径解析所有课配置文件
    * @return
    */
  def listAllConf(confPath:String, suffix:String = "spider.xml",charset:String = "UTF-8"):Array[SpiderConf] = {
    val confFiles:Array[File] = listConfFile(confPath, suffix)
    confFiles.map(f=> scala.io.Source.fromFile(f)(Codec(charset)).getLines().mkString).map(parseConf)
  }


  /**
    * 列类路径下的爬虫配置文件
    */
  def listConfFile(confPath:String, suffix:String): Array[File] ={
    if(null==confPath) throw new RuntimeException(s"无效的配置类路径:$confPath")
    val confFile:File = new File(confPath)
    if(!confFile.exists()){
      Array.empty
    }else if(confFile.isDirectory){
      new File(confPath).listFiles().filter(_.getName.endsWith(suffix))
    }else{
      Array(confFile)
    }
  }

  /**
    * 解析配置文件
    * @return
    */
  def parseConf(xmlStr:String): SpiderConf ={
    // 获取根节点
    val root:Elem = XML.loadString(xmlStr)
    val rootProp:XmlProps = XmlProps(root, "root")
    // 配置id
    val id:String = rootProp.value("id", "text")(strType)
    // 解析是否启用
    val enable = rootProp.valueOption("enable")(booleanType).getOrElse(true)
    // 解析描述信息
    val desc:String = rootProp.valueOption("desc", "text")(strType).getOrElse("")
    // 解析 startUrls
    val startUrls:List[Component] = rootProp.props("startUrls", SuffixMatcher("url")).map{ prop=>
      val clazz:String = prop.tag match {
        case "url" => "org.luosl.webmagicx.urlcreator.StandardUrlCreator"
        case "rangeUrl" => "org.luosl.webmagicx.urlcreator.RangeUrlCreator"
        case other:String => throw ConfException(s"无效的起始Url类型:$other")
      }
      Component(clazz, prop)
    }
    // 解析 目标url
    val targetUrlRegexs:Seq[TargetUrlRegex] = rootProp.props("targetUrlRegexs", "regex").map{ prop=>
      val lastExtract:Option[Extract] = List("xpath", "cssSelector", "jsonPath").map{ types=>
        prop.valueOption(types)(strType).map(ex=> Extract(types,ex))
      }.filter(_.isDefined).lastOption.flatten
      val priority:Long = prop.valueOrDefault("priority")(longType)(0)
      val regex:String = prop.value("text")(strType)
      TargetUrlRegex(lastExtract, priority, regex)
    }
    // 解析 task
    val task:Option[Task] =  rootProp.props("task").lastOption.map{ prop=>
      val startNow:Boolean = prop.valueOrDefault("startNow")(booleanType)(true)
      Task(startNow, prop.value("corn")(strType))
    }
    // 解析代理
    val proxies:List[Proxy] = rootProp.props("proxies", "proxy").map{ prop=>
      Proxy(
        prop.value("host")(strType),
        prop.value("port")(intType),
        prop.valueOption("user")(strType),
        prop.valueOption("password")(strType)
      )
    }

    // 解析 attribute
    val maxDeep = rootProp.valueOrDefault("attribute", "maxDeep", "text")(intType)(DefaultConfValue.maxDeep)
    val charset:String = rootProp.valueOrDefault("attribute", "charset", "text")(strType)(DefaultConfValue.charset)
    val timeout:Int = rootProp.valueOrDefault("attribute", "timeout", "text")(intType)(DefaultConfValue.timeout)
    val threadNum:Int = rootProp.valueOrDefault("attribute", "threadNum", "text")(intType)(DefaultConfValue.threadNum)
    val retryTimes:Int = rootProp.valueOrDefault("attribute", "retryTimes", "text")(intType)(DefaultConfValue.retryTimes)
    val sleep:Int = rootProp.valueOrDefault("attribute", "sleep", "text")(intType)(DefaultConfValue.sleep)
    val attr:Attr = Attr(maxDeep, charset, timeout, threadNum, retryTimes, sleep)
    // 解析组件 components
    val commentMap:Map[String, List[Component]] = defaultComponentTags.flatMap{ compName=>
      rootProp.props("components", SuffixMatcher(compName)).map{ prop=>
        val clazz:String = systemCompnentTags.getOrElse(prop.tag, prop.value("class", "text")(strType))
        compName -> Component(clazz, prop)
      }
    }.groupBy(_._1).map(tu=> tu._1 -> tu._2.map(_._2))
    val components:Components = Components(
      commentMap.getOrElse("handler", List.empty),
      commentMap.getOrElse("pipeline", List.empty),
      commentMap.getOrElse("scheduler", List.empty).lastOption
    )
    // 解析site
    val userAgent:String = rootProp.valueOrDefault("site", "userAgent", "text")(strType)("")
    val headers:Map[String, String] = rootProp.props("site", "headers", "header")
      .map(prop=>prop.value("key")(strType) -> prop.value("value")(strType)).toMap
    val cookies:Map[String, String] = rootProp.props("site", "cookies", "cookie")
      .map(prop=>prop.value("key")(strType) -> prop.value("value")(strType)).toMap
    val site = Site(userAgent,headers,cookies)
    // 解析 Field
    val fields:Seq[Field] = rootProp.props("fields", "field").map{ fieldProp=>
      val name:String = fieldProp.value("name", "text")(strType)
      val scope:String = fieldProp.valueOrDefault("scope", "text")(strType)(DefaultConfValue.scope)
      val textFormat:Boolean = fieldProp.valueOrDefault("textFormat", "text")(booleanType)(DefaultConfValue.textFormat)
      val must:Boolean = fieldProp.valueOrDefault("must", "text")(booleanType)(DefaultConfValue.must)
      val extracts:Seq[Extract] = fieldProp.props("extract").map{ prop=>
        val types:String = prop.valueOrDefault("type")(strType)(DefaultConfValue.extractType)
        val expression:String = prop.value("expression")(strType)
        Extract(types, expression)
      }
      Field(name, extracts, scope, textFormat, must)
    }
    SpiderConf(id, desc, startUrls,targetUrlRegexs,task, proxies,
      components,fields,site,attr, xmlStr, enable)
  }

}

